In [2]:
import numpy as np #linear algebra
import pandas as pd # data processing,CSV file I/O(e.g pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
In [3]:
data = sns.load_dataset("iris")
data
Out[3]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
... ... ... ... ... ...
145 6.7 3.0 5.2 2.3 virginica
146 6.3 2.5 5.0 1.9 virginica
147 6.5 3.0 5.2 2.0 virginica
148 6.2 3.4 5.4 2.3 virginica
149 5.9 3.0 5.1 1.8 virginica

150 rows × 5 columns

In [4]:
data.head()
Out[4]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
In [5]:
data.shape
Out[5]:
(150, 5)
In [6]:
data.duplicated().sum()
Out[6]:
1
In [7]:
data.drop_duplicates(inplace=True)
In [8]:
data.duplicated().sum()
Out[8]:
0
In [9]:
data.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 149 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  149 non-null    float64
 1   sepal_width   149 non-null    float64
 2   petal_length  149 non-null    float64
 3   petal_width   149 non-null    float64
 4   species       149 non-null    object 
dtypes: float64(4), object(1)
memory usage: 7.0+ KB
In [10]:
data.columns
Out[10]:
Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')
In [11]:
#VISUALIZATION
In [12]:
plt.bar(data['species'],data['sepal_width'])
plt.xticks(rotation=90)
plt.show()
In [13]:
fig=px.bar(data,x='sepal_width',y='sepal_length',color='sepal_width')
fig.show()
In [14]:
fig=px.violin(data,x='petal_length',y='petal_width',color='petal_length')
fig.show()
In [15]:
plt.figure(figsize=(10,4))
sns.countplot(x='petal_length', data=data, color='b')
plt.title('petal_length')
plt.show()
In [16]:
plt.figure(figsize=(10,4))
top_car = data['petal_width'].value_counts().nlargest(10)
sns.countplot(y=data.petal_width, order=top_car.index, color='red')
Out[16]:
<AxesSubplot:xlabel='count', ylabel='petal_width'>
In [17]:
sns.lineplot(x='sepal_length', y='species', data=data)
Out[17]:
<AxesSubplot:xlabel='sepal_length', ylabel='species'>
In [18]:
sns.barplot(data['petal_length'],data['sepal_length'],color='r')
plt.xticks(rotation=90)
plt.show()
D:\anaconda files\lib\site-packages\seaborn\_decorators.py:36: FutureWarning:

Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.

In [ ]:
 
In [19]:
sns.displot(data["species"])
Out[19]:
<seaborn.axisgrid.FacetGrid at 0x19415af7580>
In [20]:
sns.countplot(x='sepal_width',data=data)
plt.xticks(rotation=90)
Out[20]:
(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22]),
 [Text(0, 0, '2.0'),
  Text(1, 0, '2.2'),
  Text(2, 0, '2.3'),
  Text(3, 0, '2.4'),
  Text(4, 0, '2.5'),
  Text(5, 0, '2.6'),
  Text(6, 0, '2.7'),
  Text(7, 0, '2.8'),
  Text(8, 0, '2.9'),
  Text(9, 0, '3.0'),
  Text(10, 0, '3.1'),
  Text(11, 0, '3.2'),
  Text(12, 0, '3.3'),
  Text(13, 0, '3.4'),
  Text(14, 0, '3.5'),
  Text(15, 0, '3.6'),
  Text(16, 0, '3.7'),
  Text(17, 0, '3.8'),
  Text(18, 0, '3.9'),
  Text(19, 0, '4.0'),
  Text(20, 0, '4.1'),
  Text(21, 0, '4.2'),
  Text(22, 0, '4.4')])
In [21]:
sns.boxplot(x='sepal_width',y='petal_width',data=data)
Out[21]:
<AxesSubplot:xlabel='sepal_width', ylabel='petal_width'>
In [22]:
#MODEL BUILDING
In [23]:
X = data[['sepal_length','sepal_width','petal_length','petal_width']]
X.head()
Out[23]:
sepal_length sepal_width petal_length petal_width
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
In [24]:
y = data['species']
y.head()
Out[24]:
0    setosa
1    setosa
2    setosa
3    setosa
4    setosa
Name: species, dtype: object
In [25]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

y=le.fit_transform(y)
In [26]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.3)
In [27]:
from sklearn.ensemble import AdaBoostClassifier

abc = AdaBoostClassifier(n_estimators=50, learning_rate=1, random_state=0)

model = abc.fit(X_train, y_train)

y_pred = model.predict(X_test)
In [32]:
from sklearn.metrics import accuracy_score

print("AdaBoost Classifier Model Accuracy:",accuracy_score(y_test, y_pred))
AdaBoost Classifier Model Accuracy: 0.9555555555555556
In [33]:
data.to_csv("adaboost 1.csv")
In [ ]: